## setup

# clean environment
rm(list=ls())

# load packages
library(sandwich)
library(lmtest)
library(ggplot2)
library(tidyverse)


# load data
dat <- read.csv2("data_experiment_outcomes_csv.csv",
                 encoding = 'UTF-8')

## assign correct types (just to be safe)
# outcome variable to numeric
dat$response <- as.numeric(as.character(dat$response))

# IVs to numeric
dat$immback <- as.numeric(as.character(dat$immback))
dat$partisan <- as.numeric(as.character(dat$partisan))


# subset only treated emails
dat <- dat[!is.na(dat$response),]


# turkish non-responses
turk_nonresp <- sum(dat$response==0 & dat$immback==1, na.rm=T)

# get number of observations equal to certain share of non-responses in spam
german_nonresp_spam05 <- round(sum(dat$response==0 & dat$immback==0)*.05)
german_nonresp_spam10 <- round(sum(dat$response==0 & dat$immback==0)*.1)
german_nonresp_spam15 <- round(sum(dat$response==0 & dat$immback==0)*.15)
german_nonresp_spam20 <- round(sum(dat$response==0 & dat$immback==0)*.2)

# random number
set.seed(3209)
dat$rn <- rnorm(nrow(dat))


### when the share of German non-responses in Spam is 5% ###

# order dataset and throw the number above out
dat <- dat[order(dat$response, dat$immback, dat$rn),]
dat5 <- dat[(german_nonresp_spam05+1):nrow(dat),]

# number of turkish non-responses in Spam
turk_nonresp_spam <- turk_nonresp*seq(.1, .55, .05)

# order by turk and response
dat5 <- dat5[order(-dat5$immback, dat5$response, dat5$rn),]

# models
mods5 <- sapply(turk_nonresp_spam, function(x) {
  
  mod <- lm(response~immback, dat5[(x+1):nrow(dat5),])
  coeftest(mod, vcov = vcovHC(mod, "HC1"))[2,1:2]
  
})


### when the share of German non-responses in Spam is 10% ###

# order dataset and throw the number above out
dat <- dat[order(dat$response, dat$immback, dat$rn),]
dat10 <- dat[(german_nonresp_spam10+1):nrow(dat),]

# number of turkish non-responses in Spam
turk_nonresp_spam <- turk_nonresp*seq(.15, .6, .05)

# order by turk and response
dat10 <- dat10[order(-dat10$immback, dat10$response, dat10$rn),]

# models
mods10 <- sapply(turk_nonresp_spam, function(x) {
  
  mod <- lm(response~immback, dat10[(x+1):nrow(dat10),])
  coeftest(mod, vcov = vcovHC(mod, "HC1"))[2,1:2]
  
})


### when the share of German non-responses in Spam is 15% ###

# order dataset and throw the number above out
dat <- dat[order(dat$response, dat$immback, dat$rn),]
dat15 <- dat[(german_nonresp_spam15+1):nrow(dat),]

# number of turkish non-responses in Spam
turk_nonresp_spam <- turk_nonresp*seq(.2, .65, .05)

# order by turk and response
dat15 <- dat15[order(-dat15$immback, dat15$response, dat15$rn),]

# models
mods15 <- sapply(turk_nonresp_spam, function(x) {
  
  mod <- lm(response~immback, dat15[(x+1):nrow(dat15),])
  coeftest(mod, vcov = vcovHC(mod, "HC1"))[2,1:2]
  
})


### when the share of German non-responses in Spam is 20% ###

# order dataset and throw the number above out
dat <- dat[order(dat$response, dat$immback, dat$rn),]
dat20 <- dat[(german_nonresp_spam20+1):nrow(dat),]

# number of turkish non-responses in Spam
turk_nonresp_spam <- turk_nonresp*seq(.25, .7, .05)

# order by turk and response
dat20 <- dat20[order(-dat20$immback, dat20$response, dat20$rn),]

# models
mods20 <- sapply(turk_nonresp_spam, function(x) {
  
  mod <- lm(response~immback, dat20[(x+1):nrow(dat20),])
  coeftest(mod, vcov = vcovHC(mod, "HC1"))[2,1:2]
  
})


## compile estimates in data frame
ests <- data.frame(t(cbind(mods5, mods10, mods15, mods20)))
names(ests) <- c("coef", "se")
ests$baseline <- rep(c(.05, .1, .15, .2), each=10)
ests$baseline2 <- rep(c("Share of Spam in\nGerman Condition = .05", "Share of Spam in\nGerman Condition = .1",
                        "Share of Spam in\nGerman Condition = .15", "Share of Spam in\nGerman Condition = .2"), each=10)
ests$share_spam <- c(seq(.10, .55, .05), seq(.15, .6, .05), seq(.20, .65, .05), seq(.25, .7, .05))
ests$diff <- ests$share_spam - ests$baseline

## plot
ggplot(data=ests, aes(x=diff, y=coef)) + 
  geom_point() + 
  geom_errorbar(aes(ymin=coef-1.96*se, ymax=coef+1.96*se), width=0) +
  geom_hline(yintercept = 0, linetype="dashed") +
  facet_wrap(~baseline2, scales="free_x") +
  xlab("Difference in Share of Spam in Turkish Condition\nRelative to German Condition") +
  ylab("Treatment Effect") +
  theme_bw() +
  theme(panel.grid = element_blank(), text = element_text(size=14))

